To do

  • do outlier exclusion first and then fit equivalent split half models?
set.seed(42)

# dependencies
library(tidyverse)
library(metafor)
library(knitr)
library(kableExtra)
library(psych)
library(ggExtra)
library(scales) 
library(janitor)
library(parallel)
#library(ggtext) # devtools::install_github("clauswilke/ggtext")


# function to round all numeric vars in a data frame
round_df <- function(df, n_digits = 3) {
  df %>% mutate_if(is.numeric, janitor::round_half_up, digits = n_digits)
}

probability_of_superiority_function <- function(x, y) {
  nx <- length(x)
  ny <- length(y)
  rx <- sum(rank(c(x, y))[1:nx])
  A = (rx / nx - (nx + 1) / 2) / ny
  return(A)
}

spearman_brown_correction <- function(r_pearson) {
  inversion <- ifelse(r_pearson < 0, -1, 1)
  val <- janitor::round_half_up(2*abs(r_pearson)/(1 + abs(r_pearson)), 2)
  return(val*inversion)
}

apa_p_value <- function(p){
  p_formatted <- ifelse(p >= 0.001, paste("=", janitor::round_half_up(p, 3)),
                        ifelse(p < 0.001, "< .001", NA))
  p_formatted <- gsub(pattern = "0.", replacement = ".", x = p_formatted, fixed = TRUE)
  p_formatted
}

add_heterogeneity_metrics_to_forest <- function(fit) {
  bquote(paste("RE Model (", tau^2, " = ", .(formatC(janitor::round_half_up(fit$tau2, 2))), 
               ", ", I^2, " = ", .(formatC(janitor::round_half_up(fit$I2, 1))),
               "%, ", H^2," = ", .(formatC(janitor::round_half_up(fit$H2, 2))), ")"))
}

# table format in output
options(knitr.table.format = "html") 

# create directory needed to save output
dir.create("models")
dir.create("plots")

Data

data_demographics <- 
  read_csv("../data/effect sizes for meta-analyses/data_demographics.csv")

data_D_scores_internal_consistency_oddeventrials <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_internal_consistency_oddeventrials.csv")

data_D_scores_internal_consistency_firstsecondhalves <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_internal_consistency_firstsecondhalves.csv") 

data_D_scores_internal_consistency_permuted_estimates <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_internal_consistency_permuted_estimates.csv")

data_A_scores_internal_consistency_permuted_estimates <- 
  read_csv("../data/effect sizes for meta-analyses/data_A_scores_internal_consistency_permuted_estimates.csv")

data_D_scores_internal_consistency_permuted_estimates_by_block_order <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_internal_consistency_permuted_estimates_by_block_order.csv") 

data_D_scores_test_retest_r <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_test_retest_r.csv")

data_D_scores_test_retest_icc <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_test_retest_icc.csv")

# using stricter exclusion criteria
data_D_scores_internal_consistency_permuted_estimates_stricter <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_internal_consistency_permuted_estimates_stricter.csv")

data_D_scores_test_retest_icc_stricter <- 
  read_csv("../data/effect sizes for meta-analyses/data_D_scores_test_retest_icc_stricter.csv")

Demographics

data_demographics %>%
  summarize(mean_age = mean(age, na.rm = TRUE),
            sd_age = sd(age, na.rm = TRUE)) %>% 
  round_df(1) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
mean_age sd_age
19.9 3.8
data_demographics %>%
  count(tolower(gender)) %>% 
  arrange(desc(n)) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
tolower(gender) n
female 824
male 517
other 3
data_demographics %>%
  count(race) %>% 
  arrange(desc(n)) %>%
  drop_na() %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
race n
white 315
black 202
hispanic/latino 62
asian 19
Other 10
multiracial 10
american indian/alaska native 3
arabic 1
asian, black, white 1
hispanic and african-american 1
multi 1
multi-racial 1
native hawaiian/other pacific islander 1
saudi 1
saudi arabia 1
white and hispanic 1
data_demographics %>%
  count(sexuality) %>% 
  drop_na() %>%
  arrange(desc(n)) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
sexuality n
heterosexual 524
bisexual 46
homosexual 23

Sample sizes

data_D_scores_internal_consistency_permuted_estimates %>%
  summarize(total_ic_n = sum(n)) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
total_ic_n
1839
data_D_scores_test_retest_icc %>%
  summarize(total_trt_n = sum(n)) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
total_trt_n
318

Internal consistency

Three methods of calculating internal consistency for the Implicit Relational Assessment Procedure (IRAP): - Odd vs even trials (mostly commonly reported in literature) - First vs second half of task (more common for the most popular implicit measure, the IAT; would allow a like-for-like comparison) - Cronbach’s alpha via permutation (most robust)

The IRAP effect is most often quantified using the D1 scoring algorithm (Greenwald, Banaji & Nosek, 2003). i.e., trim all RTs > 10,000 ms, then D1 = (mean RT in the inconsistent blocks - mean RT in the consistent blocks)/SD of RTs in both block types. D1 scores are used throughout. Because of this, it wasn’t possible to use Sam Parson’s splithalf R package. A workflow for permutation-based alpha estimation using D scores is provided below.

In each case, correlations between D scores calculated from the split halves are calculated, and then the Spearman-Brown prophecy formula is applied, i.e., r_sb = 2*r_pearson / (1+r_pearson). Importantly, this correction is applied to the absolute value of the pearson’s correlation, and then its sign is corrected to be congruent with the pearson’s correlation. This ensures that r_sb has a possible range of -1 to +1, as correlations should (whereas correction of native negative correlations allow for a lower limit of -Inf). I haven’t seen this treatment of SB corrections applied to negative correlations well explicated elsewhere, so it is important to note here.

For meta analysis, spearman brown correlations are converted to Fischers r-to-z for meta analysis. Estimates of the meta effect are then converted back for reporting. Heterogeneity metrics represent heterogeneity in Fischer’s r-to-z estimates. See here.

Split-half reliability via odd vs. even trials

Bonett transformations

# fit random Effects model 
fit_oddeven <-  data_D_scores_internal_consistency_oddeventrials %>%
  rma(yi   = yi, 
      vi   = vi, 
      data = .,
      slab = domain)

# make predictions 
predictions_oddeven <-
  predict(fit_oddeven, digits = 5) %>%
  as.data.frame() %>%
  round_df(2)

# plot
metafor::forest(fit_oddeven,
                xlab = "Spearman-Brown correlation",
                addcred = TRUE,
                refline = FALSE,
                transf = transf.iabt,
                xlim = c(-1.4, 1.6),
                at = c(0, 0.25, 0.5, 0.75, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_oddeven))
text(-1.4, 46, "Study", pos = 4)
text(1.6, 46, "Spearman-Brown correlation [95% CI]", pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_oddeven$k, 
         ", r_sb = ",  janitor::round_half_up(transf.iabt(predictions_oddeven$pred), 2), 
         ", 95% CI [", janitor::round_half_up(transf.iabt(predictions_oddeven$ci.lb), 2), ", ", 
         janitor::round_half_up(transf.iabt(predictions_oddeven$ci.ub), 2), "]", 
         ", 95% CR [", janitor::round_half_up(transf.iabt(predictions_oddeven$pi.lb), 2), ", ", 
         janitor::round_half_up(transf.iabt(predictions_oddeven$pi.ub), 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_oddeven$k - 1, ") = ", janitor::round_half_up(fit_oddeven$QE, 2), 
         ", p ", ifelse(fit_oddeven$QEp < 0.0001, "< .0001", paste0("= ", as.character(janitor::round_half_up(fit_oddeven$QEp, 4)))),
         ", tau^2 = ", janitor::round_half_up(fit_oddeven$tau2, 2), 
         ", I^2 = ",   janitor::round_half_up(fit_oddeven$I2, 2),
         "%, H^2 = ",   janitor::round_half_up(fit_oddeven$H2, 2))

Meta effect: Meta analysis: k = 44, r_sb = 0.54, 95% CI [0.47, 0.6], 95% CR [0.18, 0.74].

Heterogeneity: Heterogeneity tests: Q(df = 43) = 77.69, p = 9e-04, tau^2 = 0.08, I^2 = 44.62%, H^2 = 1.81.

Split-half reliability via first vs. second half of the task

Bonett transformations

# fit random Effects model 
fit_firstsecond <- data_D_scores_internal_consistency_firstsecondhalves %>%
  rma(yi   = yi, 
      vi   = vi, 
      data = .,
      slab = domain)

# make predictions 
predictions_firstsecond <-
  predict(fit_firstsecond, digits = 5) %>%
  as.data.frame() %>%
  round_df(2)

# plot
metafor::forest(fit_firstsecond,
                xlab = "Spearman-Brown correlation",
                addcred = TRUE,
                refline = FALSE,
                transf = transf.iabt,
                xlim = c(-1.4, 1.6),
                at = c(0, 0.25, 0.5, 0.75, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_firstsecond))
text(-1.4, 46, "Study", pos = 4)
text(1.6, 46, "Spearman-Brown correlation [95% CI]", pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_firstsecond$k, 
         ", r_sb = ",  janitor::round_half_up(transf.iabt(predictions_firstsecond$pred), 2), 
         ", 95% CI [", janitor::round_half_up(transf.iabt(predictions_firstsecond$ci.lb), 2), ", ", 
         janitor::round_half_up(transf.iabt(predictions_firstsecond$ci.ub), 2), "]", 
         ", 95% CR [", janitor::round_half_up(transf.iabt(predictions_firstsecond$pi.lb), 2), ", ", 
         janitor::round_half_up(transf.iabt(predictions_firstsecond$pi.ub), 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_firstsecond$k - 1, ") = ", janitor::round_half_up(fit_firstsecond$QE, 2), 
         ", p ", ifelse(fit_firstsecond$QEp < 0.0001, "< .0001", paste0("= ", as.character(janitor::round_half_up(fit_firstsecond$QEp, 4)))),
         ", tau^2 = ", janitor::round_half_up(fit_firstsecond$tau2, 2), 
         ", I^2 = ",   janitor::round_half_up(fit_firstsecond$I2, 2),
         "%, H^2 = ",   janitor::round_half_up(fit_firstsecond$H2, 2))

Meta effect: Meta analysis: k = 44, r_sb = 0.48, 95% CI [0.41, 0.54], 95% CR [0.15, 0.68].

Heterogeneity: Heterogeneity tests: Q(df = 43) = 69.94, p = 0.0058, tau^2 = 0.06, I^2 = 36.3%, H^2 = 1.57.

Permutation-based split half correlations

ie an estimate of alpha through large number number of random half splits (see Parsons, Kruijt, & Fox. 2019). Estimate of alpha obtained via mean of the resampled Spearman-Brown corrected split half correlations, when calculating D1 scores for each half and sampling 10000 permutations.

Plot native untransformed data.

ggplot(data_D_scores_internal_consistency_permuted_estimates, 
       aes(y = alpha, x = domain)) +
  geom_linerange(aes(ymin = alpha_ci_lower, ymax = alpha_ci_upper)) +
  geom_point() +
  coord_flip() +
  xlab("Domain") +
  ylab("Alpha") 

Bonett transformations

# fit random Effects model 
fit_internal_consistency_permuted_estimates <- 
  data_D_scores_internal_consistency_permuted_estimates %>%
  rma(yi   = yi, 
      vi   = vi, 
      data = .,
      slab = domain)

# make predictions 
predictions_permutations <-
  predict(fit_internal_consistency_permuted_estimates, digits = 5) %>%
  as.data.frame() %>%
  round_df(2)

# plot
metafor::forest(fit_internal_consistency_permuted_estimates,
                xlab = bquote(paste("Cronbach's ", alpha)),
                addcred = TRUE,
                refline = FALSE,
                transf = transf.iabt,
                xlim = c(-1.4, 1.6),
                at = c(0, 0.25, 0.5, 0.75, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_internal_consistency_permuted_estimates))
text(-1.4, 46, "Study", pos = 4)
text(1.6, 46, bquote(paste("Cronbach's ", alpha, " [95% CI]")), pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_internal_consistency_permuted_estimates$k, 
         ", alpha = ",  janitor::round_half_up(transf.iabt(predictions_permutations$pred), 2), 
         ", 95% CI [", janitor::round_half_up(transf.iabt(predictions_permutations$ci.lb), 2), ", ", 
         janitor::round_half_up(transf.iabt(predictions_permutations$ci.ub), 2), "]", 
         ", 95% CR [", janitor::round_half_up(transf.iabt(predictions_permutations$pi.lb), 2), ", ", 
         janitor::round_half_up(transf.iabt(predictions_permutations$pi.ub), 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_internal_consistency_permuted_estimates$k - 1, ") = ", janitor::round_half_up(fit_internal_consistency_permuted_estimates$QE, 2), 
         ", p ", ifelse(fit_internal_consistency_permuted_estimates$QEp < 0.0001, "< .0001", paste0("= ", as.character(janitor::round_half_up(fit_internal_consistency_permuted_estimates$QEp, 4)))),
         ", tau^2 = ", janitor::round_half_up(fit_internal_consistency_permuted_estimates$tau2, 2), 
         ", I^2 = ",   janitor::round_half_up(fit_internal_consistency_permuted_estimates$I2, 2),
         "%, H^2 = ",   janitor::round_half_up(fit_internal_consistency_permuted_estimates$H2, 2))

# save to disk for making pdf plot
write_rds(fit_internal_consistency_permuted_estimates, "models/fit_internal_consistency_permuted_estimates.rds")

Meta effect: Meta analysis: k = 44, alpha = 0.53, 95% CI [0.47, 0.58], 95% CR [0.29, 0.69].

Heterogeneity: Heterogeneity tests: Q(df = 43) = 62.47, p = 0.0277, tau^2 = 0.04, I^2 = 28.57%, H^2 = 1.4.

Detection of effect sizes with undue influence on the meta effect size

Iteration 1

# calculate influence diagnostics
influence_icc_permuted <- influence(fit_internal_consistency_permuted_estimates)
 
# plot the influence diagnostics
plot(influence_icc_permuted, layout = c(8,1))

data_D_scores_internal_consistency_permuted_estimates |>
  mutate(number = row_number()) |>
  select(number, domain) |>
  kable() |>
  kable_classic(full_width = FALSE)
number domain
1 Body image
2 Clinton-Trump
3 Countries (1)
4 Countries (2)
5 Countries (3)
6 Countries (4)
7 Death (1)
8 Death (2)
9 Death (3)
10 Death (4)
11 Disgust (1)
12 Disgust (2)
13 Friend-Enemy
14 Gender stereotypes (1)
15 Gender stereotypes (2)
16 Gender stereotypes (3)
17 Gender stereotypes (4)
18 Lincoln-Hitler
19 Non-words
20 Personality - Agreeableness
21 Personality - Conscientiousness
22 Personality - Extraversion
23 Personality - Neuroticism
24 Personality - Openness
25 Professor competence
26 Race (1)
27 Race (2)
28 Religion
29 Rich-Poor
30 Self-competence (am)
31 Self-competence (want to be)
32 Sexuality (1)
33 Sexuality (2)
34 Shapes & colors (1)
35 Shapes & colors (2)
36 Shapes & colors (3)
37 Shapes & colors (4)
38 Shapes & colors (5)
39 Shapes & colors (6)
40 Shapes & colors (7)
41 Stigma - ADHD
42 Stigma - PTSD
43 Stigma - Schizophrenia
44 Valenced words
  • Sexuality domains should be excluded based on tau2.del

Iteration 2

fit_temp <- 
  data_D_scores_internal_consistency_permuted_estimates %>%
  filter(!str_detect(domain, "Sexuality")) %>%
  rma(yi   = yi, 
      vi   = vi, 
      data = .,
      slab = domain)

# calculate influence diagnostics
influence_icc_permuted_2 <- influence(fit_temp)
 
# plot the influence diagnostics
plot(influence_icc_permuted_2, layout = c(8,1))

data_D_scores_internal_consistency_permuted_estimates |>
  filter(!str_detect(domain, "Sexuality")) |>
  mutate(number = row_number()) |>
  select(number, domain) |>
  kable() |>
  kable_classic(full_width = FALSE)
number domain
1 Body image
2 Clinton-Trump
3 Countries (1)
4 Countries (2)
5 Countries (3)
6 Countries (4)
7 Death (1)
8 Death (2)
9 Death (3)
10 Death (4)
11 Disgust (1)
12 Disgust (2)
13 Friend-Enemy
14 Gender stereotypes (1)
15 Gender stereotypes (2)
16 Gender stereotypes (3)
17 Gender stereotypes (4)
18 Lincoln-Hitler
19 Non-words
20 Personality - Agreeableness
21 Personality - Conscientiousness
22 Personality - Extraversion
23 Personality - Neuroticism
24 Personality - Openness
25 Professor competence
26 Race (1)
27 Race (2)
28 Religion
29 Rich-Poor
30 Self-competence (am)
31 Self-competence (want to be)
32 Shapes & colors (1)
33 Shapes & colors (2)
34 Shapes & colors (3)
35 Shapes & colors (4)
36 Shapes & colors (5)
37 Shapes & colors (6)
38 Shapes & colors (7)
39 Stigma - ADHD
40 Stigma - PTSD
41 Stigma - Schizophrenia
42 Valenced words
  • Gender stereotypes (3) should be excluded based on tau2.del

Sensitivity analysis

excluding Sexuality (1), Sexuality (2) and Gender (3) as outliers with undue influence on the meta effect size. Without these domains, heterogeneity is reduced to zero.

# fit random Effects model 
fit_internal_consistency_permuted_estimates_sensitivity <- 
  data_D_scores_internal_consistency_permuted_estimates %>%
  filter(!str_detect(domain, "Sexuality")) |>
  filter(!domain %in% c("Gender stereotypes (3)")) %>%
  rma(yi   = yi, 
      vi   = vi, 
      data = .,
      slab = domain)

# make predictions 
predictions_permutations_sensitivity <-
  predict(fit_internal_consistency_permuted_estimates_sensitivity, digits = 5) %>%
  as.data.frame() %>%
  round_df(2)

# plot
metafor::forest(fit_internal_consistency_permuted_estimates_sensitivity,
                xlab = bquote(paste("Cronbach's ", alpha)),
                addcred = TRUE,
                refline = FALSE,
                transf = transf.iabt,
                xlim = c(-1.4, 1.6),
                at = c(0, 0.25, 0.5, 0.75, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_internal_consistency_permuted_estimates_sensitivity))
text(-1.4, 43, "Study", pos = 4)
text(1.6, 43, bquote(paste("Cronbach's ", alpha, " [95% CI]")), pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_internal_consistency_permuted_estimates_sensitivity$k, 
         ", alpha = ",  janitor::round_half_up(transf.iabt(predictions_permutations_sensitivity$pred), 2), 
         ", 95% CI [", janitor::round_half_up(transf.iabt(predictions_permutations_sensitivity$ci.lb), 2), ", ", 
         janitor::round_half_up(transf.iabt(predictions_permutations_sensitivity$ci.ub), 2), "]", 
         ", 95% CR [", janitor::round_half_up(transf.iabt(predictions_permutations_sensitivity$pi.lb), 2), ", ", 
         janitor::round_half_up(transf.iabt(predictions_permutations_sensitivity$pi.ub), 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_internal_consistency_permuted_estimates_sensitivity$k - 1, ") = ", janitor::round_half_up(fit_internal_consistency_permuted_estimates_sensitivity$QE, 2), 
         ", p ", ifelse(fit_internal_consistency_permuted_estimates_sensitivity$QEp < 0.0001, "< .0001", paste0("= ", as.character(janitor::round_half_up(fit_internal_consistency_permuted_estimates_sensitivity$QEp, 4)))),
         ", tau^2 = ", janitor::round_half_up(fit_internal_consistency_permuted_estimates_sensitivity$tau2, 2), 
         ", I^2 = ",   janitor::round_half_up(fit_internal_consistency_permuted_estimates_sensitivity$I2, 2),
         "%, H^2 = ",   janitor::round_half_up(fit_internal_consistency_permuted_estimates_sensitivity$H2, 2))

# save to disk for making pdf plot
write_rds(fit_internal_consistency_permuted_estimates_sensitivity, "models/fit_internal_consistency_permuted_estimates_sensitivity.rds")

Meta effect: Meta analysis: k = 41, alpha = 0.49, 95% CI [0.43, 0.54], 95% CR [0.43, 0.54].

Heterogeneity: Heterogeneity tests: Q(df = 40) = 29.69, p = 0.8837, tau^2 = 0, I^2 = 0%, H^2 = 1.

Test-retest reliability

Pearson’s r correlations

with r-to-z transformations.

fit_test_retest_r <- data_D_scores_test_retest_r %>%
  mutate(domain = ifelse(domain == "Sexuality (1)", "Sexuality (1) 7-day followup", domain)) %>%
  rma(yi   = yi, 
      vi   = vi, 
      data = .,
      slab = domain) 

# make predictions 
predictions_test_retest_r <-
  predict(fit_test_retest_r, digits = 5) %>%
  as.data.frame() %>%
  round_df(2) 

# plot
metafor::forest(fit_test_retest_r,
                xlab = "Pearson's r correlations",
                transf = transf.ztor,
                addcred = TRUE,
                refline = FALSE,
                xlim = c(-2.5, 2),
                at = c(-1, -0.5, 0, 0.5, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_test_retest_r))
text(-2.5, 10, "Study", pos = 4)
text(2, 10, "r [95% CI]", pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_test_retest_r$k, 
         ", r = ",  janitor::round_half_up(transf.ztor(predictions_test_retest_r$pred), 2), 
         ", 95% CI [", janitor::round_half_up(transf.ztor(predictions_test_retest_r$ci.lb), 2), ", ", 
         janitor::round_half_up(transf.ztor(predictions_test_retest_r$ci.ub), 2), "]", 
         ", 95% CR [", janitor::round_half_up(transf.ztor(predictions_test_retest_r$pi.lb), 2), ", ", 
         janitor::round_half_up(transf.ztor(predictions_test_retest_r$pi.ub), 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_test_retest_r$k - 1, ") = ", janitor::round_half_up(fit_test_retest_r$QE, 2), 
         ", p ", ifelse(fit_test_retest_r$QEp < 0.0001, "< .0001", paste0("= ", as.character(janitor::round_half_up(fit_test_retest_r$QEp, 4)))),
         ", tau^2 = ", janitor::round_half_up(fit_test_retest_r$tau2, 2), 
         ", I^2 = ",   janitor::round_half_up(fit_test_retest_r$I2, 2),
         "%, H^2 = ",   janitor::round_half_up(fit_test_retest_r$H2, 2))

# write to disk for pdf plots
write_rds(fit_test_retest_r, "models/fit_test_retest_r.rds")

Meta effect: Meta analysis: k = 8, r = 0.12, 95% CI [-0.11, 0.34], 95% CR [-0.45, 0.62].

Heterogeneity: Heterogeneity tests: Q(df = 7) = 24.04, p = 0.0011, tau^2 = 0.08, I^2 = 73.71%, H^2 = 3.8.

Intraclass Correlations

“It has been argued that test-retest reliability should reflect absolute agreement, rather than consistency, between measurements (Koo & Li, 2016). For example, a perfect correlation between scores at two time points may occur also when there is a systematic difference between time points (i.e. a difference that is about equal for all participants).” (Parsons, Kruijt, & Fox, 2019). Absolute agreement therefore also takes within-participant changes into account in its denominator, where consistency does not.

# fit random Effects model 
fit_test_retest_icc <- data_D_scores_test_retest_icc %>%
  mutate(domain = ifelse(domain == "Sexuality (1)", "Sexuality (1) 7-day followup", domain)) %>%
  rma(yi   = ICC, 
      sei  = se, 
      data = .,
      slab = domain)

# make predictions 
predictions_test_retest_icc <-
  predict(fit_test_retest_icc, digits = 5) %>%
  as.data.frame() %>%
  round_df(2) 

# # plot
# metafor::forest(fit_test_retest_icc,
#                 xlab = "ICC2",
#                 addcred = TRUE,
#                 refline = FALSE,
#                 xlim = c(-1.5, 1.7),
#                 at = c(-0.5, -0.25, 0, 0.25, 0.5, 0.75, 1),
#                 mlab = add_heterogeneity_metrics_to_forest(fit_test_retest_icc))
# text(-1.5, 10, "Study", pos = 4)
# text(1.7, 10, "ICC2 [95% CI]", pos = 2)

# plot
metafor::forest(fit_test_retest_icc,
                xlab = "Intraclass Correlation",
                addcred = TRUE,
                refline = FALSE,
                xlim = c(-3, 2.3),
                #at = c(-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1),
                at = c(-1, -0.5, 0, 0.5, 1),
                mlab = add_heterogeneity_metrics_to_forest(fit_test_retest_icc))
text(-3, 10, "Study", pos = 4)
text(2.3, 10, "ICC2 [95% CI]", pos = 2)

# summarize results
meta_effect <- 
  paste0("Meta analysis: k = ", fit_test_retest_icc$k, 
         ", ICC2 = ",  janitor::round_half_up(predictions_test_retest_icc$pred, 2), 
         ", 95% CI [", janitor::round_half_up(predictions_test_retest_icc$ci.lb, 2), ", ", 
         janitor::round_half_up(predictions_test_retest_icc$ci.ub, 2), "]", 
         ", 95% CR [", janitor::round_half_up(predictions_test_retest_icc$pi.lb, 2), ", ", 
         janitor::round_half_up(predictions_test_retest_icc$pi.ub, 2), "]") 

meta_heterogeneity <- 
  paste0("Heterogeneity tests: Q(df = ", fit_test_retest_icc$k - 1, ") = ", janitor::round_half_up(fit_test_retest_icc$QE, 2), 
         ", p ", ifelse(fit_test_retest_icc$QEp < 0.0001, "< .0001", paste0("= ", as.character(janitor::round_half_up(fit_test_retest_icc$QEp, 4)))),
         ", tau^2 = ", janitor::round_half_up(fit_test_retest_icc$tau2, 2), 
         ", I^2 = ",   janitor::round_half_up(fit_test_retest_icc$I2, 2),
         "%, H^2 = ",   janitor::round_half_up(fit_test_retest_icc$H2, 2))

# write to disk for pdf plots
write_rds(fit_test_retest_icc, "models/fit_test_retest_icc.rds")

Meta effect: Meta analysis: k = 8, ICC2 = 0.1, 95% CI [-0.11, 0.32], 95% CR [-0.48, 0.69].

Heterogeneity: Heterogeneity tests: Q(df = 7) = 28.84, p = 2e-04, tau^2 = 0.08, I^2 = 78.75%, H^2 = 4.71.

Detection of effect sizes with undue influence on the meta effect size

# calculate influence diagnostics
influence_trt <- influence(fit_test_retest_icc)
 
# plot the influence diagnostics
plot(influence_trt, layout = c(8,1))

data_D_scores_test_retest_icc |>
  mutate(number = row_number()) |>
  select(number, domain) |>
  kable() |>
  kable_classic(full_width = FALSE)
number domain
1 Body image
2 Disgust (1)
3 Friend-Enemy
4 Gender stereotypes (1)
5 Lincoln-Hitler
6 Race (1)
7 Religion
8 Sexuality (1)

No outliers detected

Power implications

Maximum observed correlation of two measures is a function of their true correlation and the reliability (i.e., self-correlation) of each measure.

\(r_{xy}=\frac{\rho_{xy}}{\sqrt{\rho_{xx}\rho_{yy}}}\)

Internal Consistency

observed_r <- function(true_r, reliability_a, reliability_b) {
  janitor::round_half_up(true_r * sqrt(reliability_a*reliability_b), 2)
}

max_cor <- observed_r(true_r = 1.0, 
                      reliability_a = transf.iabt(predictions_permutations_sensitivity$pred), 
                      reliability_b = 1.0)

medium_cor <- observed_r(true_r = 0.5, 
                         reliability_a = transf.iabt(predictions_permutations_sensitivity$pred), 
                         reliability_b = 1.0)

Given this meta estimate of reliability, assuming that the internal consistency of an external variable is perfect (1.0), the maximum correlation between the IRAP and that external variable (i.e., when true correlation is 1.0) is 0.7. For a true correlation of medium size (r = .5), the observed correlation would be 0.35.

Test-retest

max_cor <- observed_r(true_r = 1.0, 
                      reliability_a = predictions_test_retest_icc$pred, 
                      reliability_b = 1.0)

medium_cor <- observed_r(true_r = 0.5, 
                         reliability_a = predictions_test_retest_icc$pred, 
                         reliability_b = 1.0)

Given this meta estimate of reliability, assuming that the internal consistency of an external variable is perfect (1.0), the maximum correlation between the IRAP and that external variable (i.e., when true correlation is 1.0) is 0.32. For a true correlation of medium size (r = .5), the observed correlation would be 0.16.

Potential ways to improve reliability

Lengthening the task

Although there was some variation in test lengths, this was in a very small range of domains that also appeared to be particularly stable ones. It’s not meaningful to do moderation meta analysis on the existing data.

Internal Consistency

The Spearman-Brown prediction formula can be rearranged to make prediction about how the length of the test would need to change to meet a goal reliability:

\(\rho_{xx'}^*=\frac {n\rho_{xx'}}{1+(n-1)\rho_{xx'}}\)

where \(\rho_{xx'}^*\) refers to the goal reliability, \(\rho_{xx'}\) refers to the current reliability, and \(n\) refers to the test length multiplier.

current_ic <- transf.iabt(predictions_permutations_sensitivity$pred)

goal_ic <- 0.70
ic_length_increase_.70 <- janitor::round_half_up((goal_ic*(1 - current_ic)) / (current_ic*(1 - goal_ic)), 1)

goal_ic <- 0.80
ic_length_increase_.80 <- janitor::round_half_up((goal_ic*(1 - current_ic)) / (current_ic*(1 - goal_ic)), 1)

goal_ic <- 0.90
ic_length_increase_.90 <- janitor::round_half_up((goal_ic*(1 - current_ic)) / (current_ic*(1 - goal_ic)), 1)

Using \(a\) = 0.49, the IRAP would have to be 2.4 times longer for it to have an internal consistency of >=.70, 4.2 times longer for it to have an internal consistency of >=.80, or 9.4 times longer for it to have an internal consistency of >=.90.

Test-retest

The Spearman-Brown prediction formula can be rearranged to make prediction about how the length of the test would need to change to meet a goal reliability:

\({\rho }_{xx'}^{*}={\frac {n{\rho }_{xx'}}{1+(n-1){\rho }_{xx'}}}\)

where \({\rho }_{xx'}^{*}\) refers to the goal reliability, \({\rho }_{xx'}\) refers to the current reliability, and \({n}\) refers to the test length multiplier.

current_test_retest <- predictions_test_retest_icc$pred

goal_test_retest <- 0.70
trt_length_increase_.70 <- janitor::round_half_up((goal_test_retest*(1 - current_test_retest)) / (current_test_retest*(1 - goal_test_retest)), 1)

goal_test_retest <- 0.80
trt_length_increase_.80 <- janitor::round_half_up((goal_test_retest*(1 - current_test_retest)) / (current_test_retest*(1 - goal_test_retest)), 1)

goal_test_retest <- 0.90
trt_length_increase_.90 <- janitor::round_half_up((goal_test_retest*(1 - current_test_retest)) / (current_test_retest*(1 - goal_test_retest)), 1)

Using the absolute agreement estimate of 0.1, the IRAP would have to be 21 times longer for it to have a test-retest of >=.70, 36 times longer for it to have a test-retest of >=.80, or 81 times longer for it to have a test-retest of >=.90.

Alternative scoring algorithm: A scores

Internal consistency using permutation approach sensitivity analysis, comparing D scoring and A scoring. Multilevel moderator meta analyses, acknowledging non-independence between scores produced for each domain, and moderator by scoring type.

fit_internal_consistency_D_vs_A_scores <- 
  bind_rows(data_A_scores_internal_consistency_permuted_estimates %>%
              filter(!str_detect(domain, "Sexuality")) %>%
              mutate(scoring = "A"),
            data_D_scores_internal_consistency_permuted_estimates %>%
              filter(!str_detect(domain, "Sexuality")) %>%
              mutate(scoring = "D")) %>%
  mutate(scoring = fct_relevel(scoring, "D", "A")) %>%
  rma.mv(yi     = yi, 
         V      = vi, 
         random = ~ 1 | domain,
         mods   = ~ scoring,
         data   = .,
         slab   = domain)

fit_internal_consistency_D_vs_A_scores
## 
## Multivariate Meta-Analysis Model (k = 84; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed  factor 
## sigma^2    0.0440  0.2098     42     no  domain 
## 
## Test for Residual Heterogeneity:
## QE(df = 82) = 80.5620, p-val = 0.5242
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 1.0002, p-val = 0.3173
## 
## Model Results:
## 
##           estimate      se     zval    pval    ci.lb   ci.ub      
## intrcpt     0.7056  0.0609  11.5819  <.0001   0.5862  0.8250  *** 
## scoringA    0.0685  0.0685   1.0001  0.3173  -0.0657  0.2026      
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tibble(response_options = c("D", "A"),
       alpha = c(fit_internal_consistency_D_vs_A_scores$b[1],
                 fit_internal_consistency_D_vs_A_scores$b[1] + 
                   fit_internal_consistency_D_vs_A_scores$b[2]),
       ci_lower = c(fit_internal_consistency_D_vs_A_scores$ci.lb[1],
                    fit_internal_consistency_D_vs_A_scores$b[1] +
                      fit_internal_consistency_D_vs_A_scores$ci.lb[2]),
       ci_upper = c(fit_internal_consistency_D_vs_A_scores$ci.ub[1], 
                    fit_internal_consistency_D_vs_A_scores$b[1] + 
                      fit_internal_consistency_D_vs_A_scores$ci.ub[2])) %>%
  mutate_if(is.numeric, transf.iabt) %>%
  mutate_if(is.numeric, round, digits = 2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
response_options alpha ci_lower ci_upper
D 0.51 0.44 0.56
A 0.54 0.47 0.60

Use only one block order

Internal consistency using permutation approach sensitivity analysis, multilevel moderator meta analysis between block orders for the domains that have both.

fit_internal_consistency_block_order <- 
  data_D_scores_internal_consistency_permuted_estimates_by_block_order %>%
  filter(!str_detect(domain, "Sexuality")) %>%
  rma.mv(yi     = yi, 
         V      = vi, 
         mods   = ~ block_order,
         random = ~ 1 | domain,
         data   = .,
         slab   = paste(domain, block_order))

fit_internal_consistency_block_order
## 
## Multivariate Meta-Analysis Model (k = 50; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed  factor 
## sigma^2    0.0103  0.1014     25     no  domain 
## 
## Test for Residual Heterogeneity:
## QE(df = 48) = 27.5846, p-val = 0.9921
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 1.8002, p-val = 0.1797
## 
## Model Results:
## 
##                                estimate      se    zval    pval    ci.lb 
## intrcpt                          0.6106  0.0934  6.5373  <.0001   0.4275 
## block_orderinconsistent first    0.1773  0.1321  1.3417  0.1797  -0.0817 
##                                 ci.ub      
## intrcpt                        0.7936  *** 
## block_orderinconsistent first  0.4362      
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tibble(block_order = c("con first", "incon first"),
       alpha = c(fit_internal_consistency_block_order$b[1],
                 fit_internal_consistency_block_order$b[1] + 
                   fit_internal_consistency_block_order$b[2]),
       ci_lower = c(fit_internal_consistency_block_order$ci.lb[1],
                    fit_internal_consistency_block_order$b[1] +
                      fit_internal_consistency_block_order$ci.lb[2]),
       ci_upper = c(fit_internal_consistency_block_order$ci.ub[1], 
                    fit_internal_consistency_block_order$b[1] + 
                      fit_internal_consistency_block_order$ci.ub[2])) %>%
  mutate_if(is.numeric, transf.iabt) %>%
  mutate_if(is.numeric, round, digits = 2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
block_order alpha ci_lower ci_upper
con first 0.46 0.35 0.55
incon first 0.55 0.41 0.65

Use only either fixed or moving response options

Internal consistency using permutation approach sensitivity analysis, moderator meta analysis between response option locations between domains.

fit_internal_consistency_response_option_locations <- 
  data_D_scores_internal_consistency_permuted_estimates %>%
  filter(!str_detect(domain, "Sexuality") &
           !is.na(response_option_locations)) %>%
  rma(yi   = yi, 
      vi   = vi, 
      mods = ~ response_option_locations,
      data = .,
      slab = paste(domain, response_option_locations))

fit_internal_consistency_response_option_locations
## 
## Mixed-Effects Model (k = 42; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     0 (SE = 0.0183)
## tau (square root of estimated tau^2 value):             0
## I^2 (residual heterogeneity / unaccounted variability): 0.00%
## H^2 (unaccounted variability / sampling variability):   1.00
## R^2 (amount of heterogeneity accounted for):            100.00%
## 
## Test for Residual Heterogeneity:
## QE(df = 40) = 23.9033, p-val = 0.9795
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 10.8200, p-val = 0.0010
## 
## Model Results:
## 
##                                  estimate      se     zval    pval    ci.lb 
## intrcpt                            0.9254  0.0826  11.2009  <.0001   0.7635 
## response_option_locationsMoving   -0.3354  0.1020  -3.2894  0.0010  -0.5352 
##                                    ci.ub      
## intrcpt                           1.0873  *** 
## response_option_locationsMoving  -0.1355   ** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tibble(response_options = c("static", "moving"),
       alpha = c(fit_internal_consistency_response_option_locations$b[1],
                 fit_internal_consistency_response_option_locations$b[1] + 
                   fit_internal_consistency_response_option_locations$b[2]),
       ci_lower = c(fit_internal_consistency_response_option_locations$ci.lb[1],
                    fit_internal_consistency_response_option_locations$b[1] +
                      fit_internal_consistency_response_option_locations$ci.lb[2]),
       ci_upper = c(fit_internal_consistency_response_option_locations$ci.ub[1], 
                    fit_internal_consistency_response_option_locations$b[1] + 
                      fit_internal_consistency_response_option_locations$ci.ub[2])) %>%
  mutate_if(is.numeric, transf.iabt) %>%
  mutate_if(is.numeric, round, digits = 2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
response_options alpha ci_lower ci_upper
static 0.60 0.53 0.66
moving 0.45 0.32 0.55

Use stricter performance exclusion criteria

Internal consistency using permutation approach sensitivity analysis, moderator meta analysis between criteria.

  • Typical criteria require accuracy >= 78% and median RT < 2000ms on both con and incon blocks. It has a roughly 10% exclusion rate.
  • Stricter criteria require accuracy >= 78% and median RT < 2000ms on both con and incon blocks and within each block. It has a roughly 28% exclusion rate.
fit_internal_consistency_performance_criteria <- 
  bind_rows(data_D_scores_internal_consistency_permuted_estimates %>%
              filter(!str_detect(domain, "Sexuality")) %>%
              mutate(exclusions = "typical"),
            data_D_scores_internal_consistency_permuted_estimates_stricter %>%
              filter(!str_detect(domain, "Sexuality")) %>%
              mutate(exclusions = "stricter")) %>%
  mutate(exclusions = fct_relevel(exclusions, "typical", "stricter")) %>%
  filter(!str_detect(domain, "Sexuality") &
           !is.na(exclusions)) %>%
  rma.mv(yi   = yi, 
         V    = vi, 
         mods = ~ exclusions,
         random = ~ 1 | domain,
         data = .,
         slab = paste(domain, exclusions))

fit_internal_consistency_performance_criteria
## 
## Multivariate Meta-Analysis Model (k = 77; method: REML)
## 
## Variance Components:
## 
##             estim    sqrt  nlvls  fixed  factor 
## sigma^2    0.0367  0.1916     42     no  domain 
## 
## Test for Residual Heterogeneity:
## QE(df = 75) = 66.5264, p-val = 0.7469
## 
## Test of Moderators (coefficient 2):
## QM(df = 1) = 0.4866, p-val = 0.4855
## 
## Model Results:
## 
##                     estimate      se     zval    pval    ci.lb   ci.ub      
## intrcpt               0.7005  0.0594  11.7921  <.0001   0.5840  0.8169  *** 
## exclusionsstricter   -0.0538  0.0771  -0.6975  0.4855  -0.2048  0.0973      
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tibble(exclusions = c("typical", "stricter"),
       alpha = c(fit_internal_consistency_performance_criteria$b[1],
                 fit_internal_consistency_performance_criteria$b[1] + 
                   fit_internal_consistency_performance_criteria$b[2]),
       ci_lower = c(fit_internal_consistency_performance_criteria$ci.lb[1],
                    fit_internal_consistency_performance_criteria$b[1] +
                      fit_internal_consistency_performance_criteria$ci.lb[2]),
       ci_upper = c(fit_internal_consistency_performance_criteria$ci.ub[1], 
                    fit_internal_consistency_performance_criteria$b[1] + 
                      fit_internal_consistency_performance_criteria$ci.ub[2])) %>%
  mutate_if(is.numeric, transf.iabt) %>%
  mutate_if(is.numeric, round, digits = 2) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
exclusions alpha ci_lower ci_upper
typical 0.50 0.44 0.56
stricter 0.48 0.39 0.55

Session info

sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.2.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] janitor_2.1.0    scales_1.2.1     ggExtra_0.10.0   psych_2.2.9     
##  [5] kableExtra_1.3.4 knitr_1.42       metafor_3.8-1    metadat_1.2-0   
##  [9] Matrix_1.5-3     lubridate_1.9.2  forcats_1.0.0    stringr_1.5.0   
## [13] dplyr_1.1.0      purrr_1.0.1      readr_2.1.4      tidyr_1.3.0     
## [17] tibble_3.1.8     ggplot2_3.4.1    tidyverse_2.0.0 
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.5        sass_0.4.5        bit64_4.0.5       vroom_1.6.1      
##  [5] jsonlite_1.8.4    viridisLite_0.4.1 bslib_0.4.2       shiny_1.7.4      
##  [9] highr_0.10        yaml_2.3.7        pillar_1.8.1      lattice_0.20-45  
## [13] glue_1.6.2        digest_0.6.31     promises_1.2.0.1  rvest_1.0.3      
## [17] snakecase_0.11.0  colorspace_2.1-0  htmltools_0.5.4   httpuv_1.6.8     
## [21] pkgconfig_2.0.3   xtable_1.8-4      webshot_0.5.4     svglite_2.1.1    
## [25] later_1.3.0       tzdb_0.3.0        timechange_0.2.0  farver_2.1.1     
## [29] generics_0.1.3    ellipsis_0.3.2    cachem_1.0.7      withr_2.5.0      
## [33] cli_3.6.0         mnormt_2.1.1      magrittr_2.0.3    crayon_1.5.2     
## [37] mime_0.12         evaluate_0.20     fansi_1.0.4       nlme_3.1-157     
## [41] xml2_1.3.3        tools_4.2.1       hms_1.1.2         lifecycle_1.0.3  
## [45] munsell_0.5.0     compiler_4.2.1    jquerylib_0.1.4   systemfonts_1.0.4
## [49] rlang_1.0.6       grid_4.2.1        rstudioapi_0.14   miniUI_0.1.1.1   
## [53] labeling_0.4.2    rmarkdown_2.20    gtable_0.3.1      R6_2.5.1         
## [57] fastmap_1.1.1     bit_4.0.5         utf8_1.2.3        mathjaxr_1.6-0   
## [61] stringi_1.7.12    Rcpp_1.0.10       vctrs_0.5.2       tidyselect_1.2.0 
## [65] xfun_0.37